htseq-count input

directory <- "/Users/zhangbohan/Documents/R/file_all"
sampleFiles <- grep("lobe",list.files(directory),value=TRUE)
sampleCondition <- sub("(.*lobe).*","\\1",sampleFiles)
sampleTable <- data.frame(sampleName = sampleFiles,
                          fileName = sampleFiles,
                          condition = sampleCondition)
sampleTable$condition <- factor(sampleTable$condition)

build the DESeqDataSet

library("DESeq2")
dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
                                       directory = directory,
                                       design= ~ condition)
dds
class: DESeqDataSet 
dim: 60483 215 
metadata(1): version
assays(1): counts
rownames(60483): ENSG00000000003.13 ENSG00000000005.5 ... ENSGR0000280767.1 ENSGR0000281849.1
rowData names(0):
colnames(215): lowerlobe-TCGA-18-3421-01A-01R-0980-07.count lowerlobe-TCGA-18-4721-01A-01R-1443-07.count ...
  upperlobe-TCGA-NJ-A4YQ-01A-11R-A262-07.count upperlobe-TCGA-NJ-A55R-01A-11R-A262-07.count
colData names(1): condition

Pre-filtering

keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]

Note on factor levels

dds$condition <- factor(dds$condition, levels = c("upperlobe","lowerlobe"))
dds$condition <- droplevels(dds$condition)

results table

dds <- DESeq(dds)
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
-- replacing outliers and refitting for 7234 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)
estimating dispersions
fitting model and testing
dds
class: DESeqDataSet 
dim: 50475 215 
metadata(1): version
assays(6): counts mu ... replaceCounts replaceCooks
rownames(50475): ENSG00000000003.13 ENSG00000000005.5 ... ENSG00000281918.1 ENSG00000281920.1
rowData names(23): baseMean baseVar ... maxCooks replace
colnames(215): lowerlobe-TCGA-18-3421-01A-01R-0980-07.count lowerlobe-TCGA-18-4721-01A-01R-1443-07.count ...
  upperlobe-TCGA-NJ-A4YQ-01A-11R-A262-07.count upperlobe-TCGA-NJ-A55R-01A-11R-A262-07.count
colData names(3): condition sizeFactor replaceable

Independent hypothesis weighting (p value filtering)

library("IHW")
res <- results(dds, filterFun=ihw, contrast=c("condition","upperlobe","lowerlobe"), alpha=0.05)
res
log2 fold change (MLE): condition upperlobe vs lowerlobe 
Wald test p-value: condition upperlobe vs lowerlobe 
DataFrame with 50475 rows and 7 columns
                     baseMean log2FoldChange     lfcSE       stat    pvalue      padj    weight
                    <numeric>      <numeric> <numeric>  <numeric> <numeric> <numeric> <numeric>
ENSG00000000003.13 3530.58158    -0.14349078 0.1152011 -1.2455675 0.2129232  1.000000  0.470066
ENSG00000000005.5     1.83533     0.92124505 0.3983635  2.3125742 0.0207461  0.304791  1.634073
ENSG00000000419.11 1988.56928    -0.02316500 0.0859819 -0.2694171 0.7876088  1.000000  0.470066
ENSG00000000457.12 1042.05870     0.00836846 0.0679910  0.1230819 0.9020422  1.000000  0.424619
ENSG00000000460.15  610.06415    -0.00423868 0.1112523 -0.0380997 0.9696082  1.000000  0.582609
...                       ...            ...       ...        ...       ...       ...       ...
ENSG00000281909.1    0.557662       0.323687  0.365305   0.886074 0.3755778  1.000000   0.00000
ENSG00000281910.1    0.291194      -0.186074  0.524813  -0.354553 0.7229245  1.000000   0.00000
ENSG00000281912.1   60.828380       0.226105  0.145742   1.551401 0.1208055  0.446624   3.94932
ENSG00000281918.1    2.456463      -0.573625  0.230559  -2.487977 0.0128472  0.239031   1.74462
ENSG00000281920.1    6.473478       0.222705  0.220144   1.011633 0.3117135  0.742966   2.29753

remove the version number of the gene Ensembl number

enemble_id <- substr(row.names(dds),1,15)
rownames(res) <- enemble_id

change Ensembl number of gene in dataframe to gene id

add a column

RawCounts <- res
Ensembl_ID <- data.frame(Ensembl_ID = row.names(RawCounts))
rownames(Ensembl_ID) <- Ensembl_ID[,1]
RawCounts <- cbind(Ensembl_ID,RawCounts)

create a file to associate the ensembl id and gene id

get_map = function(input) {
  if (is.character(input)) {
    if(!file.exists(input)) stop("Bad input file.")
    message("Treat input as file")
    input = data.table::fread(input, header = FALSE)
  } else {
    data.table::setDT(input)
  }
  
  input = input[input[[3]] == "gene", ]
  
  pattern_id = ".*gene_id \"([^;]+)\";.*"
  pattern_name = ".*gene_name \"([^;]+)\";.*"
  
  gene_id = sub(pattern_id, "\\1",input[[9]])
  gene_name = sub(pattern_name, "\\1", input[[9]])
  
  Ensembl_ID_TO_Genename <- data.frame(gene_id = gene_id, gene_name = gene_name, stringsAsFactors = FALSE)
  return(Ensembl_ID_TO_Genename)
}
Ensembl_ID_TO_Genename <- get_map("gencode.v38.basic.annotation.gtf")
Treat input as file
|--------------------------------------------------|
|==================================================|
gtf_Ensembl_ID <- substr(Ensembl_ID_TO_Genename[,1],1,15)
Ensembl_ID_TO_Genename <- data.frame(gtf_Ensembl_ID, Ensembl_ID_TO_Genename[,2])
colnames(Ensembl_ID_TO_Genename) <- c("Ensembl_ID","gene_id")
write.csv(Ensembl_ID_TO_Genename,file = "Ensembl_ID_TO_Genename.csv")

#replace

res_g <-merge(Ensembl_ID_TO_Genename,RawCounts,by="Ensembl_ID")

remove unnecessary columns and duplicate gene ids

res_g <- res_g[order(res_g[,"gene_id"]),]
index <- duplicated(res_g$gene_id)
res_g <- res_g[!index,]
rownames(res_g) <- res_g[,"gene_id"]
res_g <- res_g[,-c(1:2)]

check

head(res_g)

effect size shrinkage

resultsNames(dds)
[1] "Intercept"                        "condition_lowerlobe_vs_upperlobe"

takes long time

resLFC <- lfcShrink(dds,coef="condition_lowerlobe_vs_upperlobe", type="apeglm")
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
    Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
    sequence count data: removing the noise and preserving large differences.
    Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
resLFC
log2 fold change (MAP): condition lowerlobe vs upperlobe 
Wald test p-value: condition lowerlobe vs upperlobe 
DataFrame with 50475 rows and 5 columns
                     baseMean log2FoldChange      lfcSE    pvalue      padj
                    <numeric>      <numeric>  <numeric> <numeric> <numeric>
ENSG00000000003.13 3530.58158    1.13928e-05 0.00144260 0.2129232  0.733175
ENSG00000000005.5     1.83533   -4.76141e-07 0.00144268 0.0207461  0.325044
ENSG00000000419.11 1988.56928   -2.27604e-04 0.00145166 0.7876088  0.963137
ENSG00000000457.12 1042.05870   -3.55648e-06 0.00144237 0.9020422  0.983322
ENSG00000000460.15  610.06415   -7.31588e-06 0.00144258 0.9696082  0.996177
...                       ...            ...        ...       ...       ...
ENSG00000281909.1    0.557662   -3.19932e-06 0.00144269 0.3755778        NA
ENSG00000281910.1    0.291194    3.22278e-07 0.00144269 0.7229245        NA
ENSG00000281912.1   60.828380   -9.78217e-06 0.00144264 0.1208055  0.633484
ENSG00000281918.1    2.456463    8.99681e-06 0.00144268 0.0128472  0.269497
ENSG00000281920.1    6.473478   -4.69361e-06 0.00144267 0.3117135  0.800859

speed-up and parallelization thoughts

library("BiocParallel")
register(MulticoreParam(4))

order results table by p-value

resOrdered <- res_g[order(res_g$pvalue),]
sum(res_g$padj < 0.05, na.rm=TRUE)
[1] 433

Exporting results

MA-plot

plotMA(res, ylim=c(-2,2))

more useful

plotMA(resLFC, ylim=c(-2,2))

resultsNames(dds)
[1] "Intercept"                        "condition_lowerlobe_vs_upperlobe"

3 shrinkage estimator

resAsh takes long time

resNorm <- lfcShrink(dds, coef=2, type="normal")
using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).

Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
Reference: https://doi.org/10.1093/bioinformatics/bty895
resAsh <- lfcShrink(dds, coef=2, type="ashr")
using 'ashr' for LFC shrinkage. If used in published research, please cite:
    Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
    https://doi.org/10.1093/biostatistics/kxw041
par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim <- c(1,1e5); ylim <- c(-3,3)
plotMA(resLFC, xlim=xlim, ylim=ylim, main="apeglm")
plotMA(resNorm, xlim=xlim, ylim=ylim, main="normal")
plotMA(resAsh, xlim=xlim, ylim=ylim, main="ashr")

plot counts

plotCounts(dds, gene=which.min(res$padj), intgroup="condition")

d <- plotCounts(dds, gene=which.min(res$padj), intgroup="condition", 
                returnData=TRUE)
library("ggplot2")

Attaching package: ‘ggplot2’

The following object is masked from ‘package:IHW’:

    alpha
ggplot(d, aes(x=condition, y=count)) + 
  geom_point(position=position_jitter(w=0.1,h=0)) + 
  scale_y_log10(breaks=c(25,100,400))

more information

mcols(res)$description
[1] "mean of normalized counts for all samples"               
[2] "log2 fold change (MLE): condition upperlobe vs lowerlobe"
[3] "standard error: condition upperlobe vs lowerlobe"        
[4] "Wald statistic: condition upperlobe vs lowerlobe"        
[5] "Wald test p-value: condition upperlobe vs lowerlobe"     
[6] "Weighted BH adjusted p-values"                           
[7] "IHW weights"                                             

finally write csv

getwd()
[1] "/Users/zhangbohan/Documents/R/script"
write.csv(as.data.frame(res_g), 
          file="res_g.csv")

differential expression gene p<0.05

resSig_0.05 <- subset(resOrdered, padj < 0.05)
resSig_0.05[which(resSig_0.05$log2FoldChange > 0), "up_down"] <- "up"
resSig_0.05[which(resSig_0.05$log2FoldChange < 0), "up_down"] <- "down"
resSig_0.05
getwd()
[1] "/Users/zhangbohan/Documents/R/script"
write.csv(as.data.frame(resSig_0.05), 
          file="differential_expression.csv")

data transformations and visualization

vsd data transformations

vsd <- vst(dds, blind=FALSE)

rld data transformations

takes too much time and have no results

rld <- rlog(dds, blind=FALSE)
head(assay(vsd), 3)

change of standard deviation

# this gives log2(n + 1)
ntd <- normTransform(dds)
library("vsn")
meanSdPlot(assay(ntd))

meanSdPlot(assay(vsd))

need rlg

library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors, show_rownames = FALSE)

data quality assessment by sample clustering and visualization

heatmap of ntd

show_colnames = FALSE

library("pheatmap")
select <- order(rowMeans(counts(dds,normalized=TRUE)),
                decreasing=TRUE)[1:20]
df <- as.data.frame(colData(dds)[,c("condition", "sizeFactor")])
pheatmap(assay(ntd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
         cluster_cols=FALSE, annotation_col=df, show_colnames = FALSE)

heatmap of vsd

pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
         cluster_cols=FALSE, annotation_col=df, show_colnames = FALSE)

heatmap of rlg

need rlg

pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,
         cluster_cols=FALSE, annotation_col=df)

Heatmap of the sample-to-sample distances

sampleDists <- dist(t(assay(vsd)))

show_rownames = FALSE

library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors, show_rownames = FALSE)

principal component plot of the samples

plotPCA(vsd, intgroup="condition")

same thing

pcaData <- plotPCA(vsd, intgroup="condition", returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=condition)) +
  geom_point(size=3) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
  coord_fixed()
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQojIGh0c2VxLWNvdW50IGlucHV0CmBgYHtyfQpkaXJlY3RvcnkgPC0gIn4vRG9jdW1lbnRzL1IvZmlsZV9hbGwiCmBgYAoKYGBge3J9CnNhbXBsZUZpbGVzIDwtIGdyZXAoImxvYmUiLGxpc3QuZmlsZXMoZGlyZWN0b3J5KSx2YWx1ZT1UUlVFKQpgYGAKCmBgYHtyfQpzYW1wbGVDb25kaXRpb24gPC0gc3ViKCIoLipsb2JlKS4qIiwiXFwxIixzYW1wbGVGaWxlcykKYGBgCgpgYGB7cn0Kc2FtcGxlVGFibGUgPC0gZGF0YS5mcmFtZShzYW1wbGVOYW1lID0gc2FtcGxlRmlsZXMsCiAgICAgICAgICAgICAgICAgICAgICAgICAgZmlsZU5hbWUgPSBzYW1wbGVGaWxlcywKICAgICAgICAgICAgICAgICAgICAgICAgICBjb25kaXRpb24gPSBzYW1wbGVDb25kaXRpb24pCnNhbXBsZVRhYmxlJGNvbmRpdGlvbiA8LSBmYWN0b3Ioc2FtcGxlVGFibGUkY29uZGl0aW9uKQpgYGAKIyBidWlsZCB0aGUgREVTZXFEYXRhU2V0CmBgYHtyfQpsaWJyYXJ5KCJERVNlcTIiKQpkZHMgPC0gREVTZXFEYXRhU2V0RnJvbUhUU2VxQ291bnQoc2FtcGxlVGFibGUgPSBzYW1wbGVUYWJsZSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGlyZWN0b3J5ID0gZGlyZWN0b3J5LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkZXNpZ249IH4gY29uZGl0aW9uKQpkZHMKYGBgCiMgUHJlLWZpbHRlcmluZwpgYGB7cn0Ka2VlcCA8LSByb3dTdW1zKGNvdW50cyhkZHMpKSA+PSAxMApkZHMgPC0gZGRzW2tlZXAsXQpgYGAKIyBOb3RlIG9uIGZhY3RvciBsZXZlbHMKYGBge3J9CmRkcyRjb25kaXRpb24gPC0gZmFjdG9yKGRkcyRjb25kaXRpb24sIGxldmVscyA9IGMoInVwcGVybG9iZSIsImxvd2VybG9iZSIpKQpgYGAKCmBgYHtyfQpkZHMkY29uZGl0aW9uIDwtIGRyb3BsZXZlbHMoZGRzJGNvbmRpdGlvbikKYGBgCiMgcmVzdWx0cyB0YWJsZQpgYGB7cn0KZGRzIDwtIERFU2VxKGRkcykKYGBgCmBgYHtyfQpkZHMKYGBgCgojIEluZGVwZW5kZW50IGh5cG90aGVzaXMgd2VpZ2h0aW5nIChwIHZhbHVlIGZpbHRlcmluZykKYGBge3J9CmxpYnJhcnkoIklIVyIpCnJlcyA8LSByZXN1bHRzKGRkcywgZmlsdGVyRnVuPWlodywgY29udHJhc3Q9YygiY29uZGl0aW9uIiwidXBwZXJsb2JlIiwibG93ZXJsb2JlIiksIGFscGhhPTAuMDUpCnJlcwpgYGAKIyByZW1vdmUgdGhlIHZlcnNpb24gbnVtYmVyIG9mIHRoZSBnZW5lIEVuc2VtYmwgbnVtYmVyCmBgYHtyfQplbmVtYmxlX2lkIDwtIHN1YnN0cihyb3cubmFtZXMoZGRzKSwxLDE1KQpyb3duYW1lcyhyZXMpIDwtIGVuZW1ibGVfaWQKYGBgCiMgY2hhbmdlIEVuc2VtYmwgbnVtYmVyIG9mIGdlbmUgaW4gZGF0YWZyYW1lIHRvIGdlbmUgaWQKIyMgYWRkIGEgY29sdW1uIApgYGB7cn0KUmF3Q291bnRzIDwtIHJlcwpFbnNlbWJsX0lEIDwtIGRhdGEuZnJhbWUoRW5zZW1ibF9JRCA9IHJvdy5uYW1lcyhSYXdDb3VudHMpKQpyb3duYW1lcyhFbnNlbWJsX0lEKSA8LSBFbnNlbWJsX0lEWywxXQpSYXdDb3VudHMgPC0gY2JpbmQoRW5zZW1ibF9JRCxSYXdDb3VudHMpCmBgYAojIyBjcmVhdGUgYSBmaWxlIHRvIGFzc29jaWF0ZSB0aGUgZW5zZW1ibCBpZCBhbmQgZ2VuZSBpZApgYGB7cn0KZ2V0X21hcCA9IGZ1bmN0aW9uKGlucHV0KSB7CiAgaWYgKGlzLmNoYXJhY3RlcihpbnB1dCkpIHsKICAgIGlmKCFmaWxlLmV4aXN0cyhpbnB1dCkpIHN0b3AoIkJhZCBpbnB1dCBmaWxlLiIpCiAgICBtZXNzYWdlKCJUcmVhdCBpbnB1dCBhcyBmaWxlIikKICAgIGlucHV0ID0gZGF0YS50YWJsZTo6ZnJlYWQoaW5wdXQsIGhlYWRlciA9IEZBTFNFKQogIH0gZWxzZSB7CiAgICBkYXRhLnRhYmxlOjpzZXREVChpbnB1dCkKICB9CiAgCiAgaW5wdXQgPSBpbnB1dFtpbnB1dFtbM11dID09ICJnZW5lIiwgXQogIAogIHBhdHRlcm5faWQgPSAiLipnZW5lX2lkIFwiKFteO10rKVwiOy4qIgogIHBhdHRlcm5fbmFtZSA9ICIuKmdlbmVfbmFtZSBcIihbXjtdKylcIjsuKiIKICAKICBnZW5lX2lkID0gc3ViKHBhdHRlcm5faWQsICJcXDEiLGlucHV0W1s5XV0pCiAgZ2VuZV9uYW1lID0gc3ViKHBhdHRlcm5fbmFtZSwgIlxcMSIsIGlucHV0W1s5XV0pCiAgCiAgRW5zZW1ibF9JRF9UT19HZW5lbmFtZSA8LSBkYXRhLmZyYW1lKGdlbmVfaWQgPSBnZW5lX2lkLCBnZW5lX25hbWUgPSBnZW5lX25hbWUsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkKICByZXR1cm4oRW5zZW1ibF9JRF9UT19HZW5lbmFtZSkKfQpFbnNlbWJsX0lEX1RPX0dlbmVuYW1lIDwtIGdldF9tYXAoImdlbmNvZGUudjM4LmJhc2ljLmFubm90YXRpb24uZ3RmIikKCmd0Zl9FbnNlbWJsX0lEIDwtIHN1YnN0cihFbnNlbWJsX0lEX1RPX0dlbmVuYW1lWywxXSwxLDE1KQpFbnNlbWJsX0lEX1RPX0dlbmVuYW1lIDwtIGRhdGEuZnJhbWUoZ3RmX0Vuc2VtYmxfSUQsIEVuc2VtYmxfSURfVE9fR2VuZW5hbWVbLDJdKQpjb2xuYW1lcyhFbnNlbWJsX0lEX1RPX0dlbmVuYW1lKSA8LSBjKCJFbnNlbWJsX0lEIiwiZ2VuZV9pZCIpCndyaXRlLmNzdihFbnNlbWJsX0lEX1RPX0dlbmVuYW1lLGZpbGUgPSAiRW5zZW1ibF9JRF9UT19HZW5lbmFtZS5jc3YiKQpgYGAKI3JlcGxhY2UKYGBge3J9CnJlc19nIDwtbWVyZ2UoRW5zZW1ibF9JRF9UT19HZW5lbmFtZSxSYXdDb3VudHMsYnk9IkVuc2VtYmxfSUQiKQpgYGAKIyMgcmVtb3ZlIHVubmVjZXNzYXJ5IGNvbHVtbnMgYW5kIGR1cGxpY2F0ZSBnZW5lIGlkcwpgYGB7cn0KcmVzX2cgPC0gcmVzX2dbb3JkZXIocmVzX2dbLCJnZW5lX2lkIl0pLF0KaW5kZXggPC0gZHVwbGljYXRlZChyZXNfZyRnZW5lX2lkKQpyZXNfZyA8LSByZXNfZ1shaW5kZXgsXQpyb3duYW1lcyhyZXNfZykgPC0gcmVzX2dbLCJnZW5lX2lkIl0KcmVzX2cgPC0gcmVzX2dbLC1jKDE6MildCmBgYAojIGNoZWNrCmBgYHtyfQpoZWFkKHJlc19nKQpgYGAKIyBlZmZlY3Qgc2l6ZSBzaHJpbmthZ2UKYGBge3J9CnJlc3VsdHNOYW1lcyhkZHMpCmBgYAojIyB0YWtlcyBsb25nIHRpbWUKYGBge3J9CnJlc0xGQyA8LSBsZmNTaHJpbmsoZGRzLGNvZWY9ImNvbmRpdGlvbl9sb3dlcmxvYmVfdnNfdXBwZXJsb2JlIiwgdHlwZT0iYXBlZ2xtIikKcmVzTEZDCmBgYAojIHNwZWVkLXVwIGFuZCBwYXJhbGxlbGl6YXRpb24gdGhvdWdodHMKYGBge3J9CmxpYnJhcnkoIkJpb2NQYXJhbGxlbCIpCnJlZ2lzdGVyKE11bHRpY29yZVBhcmFtKDQpKQpgYGAKIyBvcmRlciByZXN1bHRzIHRhYmxlIGJ5IHAtdmFsdWUKYGBge3J9CnJlc09yZGVyZWQgPC0gcmVzX2dbb3JkZXIocmVzX2ckcHZhbHVlKSxdCmBgYAoKYGBge3J9CnN1bShyZXNfZyRwYWRqIDwgMC4wNSwgbmEucm09VFJVRSkKYGBgCgojIEV4cG9ydGluZyByZXN1bHRzCiMjIE1BLXBsb3QKYGBge3J9CnBsb3RNQShyZXMsIHlsaW09YygtMiwyKSkKYGBgCiMgbW9yZSB1c2VmdWwKYGBge3J9CnBsb3RNQShyZXNMRkMsIHlsaW09YygtMiwyKSkKYGBgCgpgYGB7cn0KcmVzdWx0c05hbWVzKGRkcykKYGBgCiMgMyBzaHJpbmthZ2UgZXN0aW1hdG9yCiMjIHJlc0FzaCB0YWtlcyBsb25nIHRpbWUKYGBge3J9CnJlc05vcm0gPC0gbGZjU2hyaW5rKGRkcywgY29lZj0yLCB0eXBlPSJub3JtYWwiKQpyZXNBc2ggPC0gbGZjU2hyaW5rKGRkcywgY29lZj0yLCB0eXBlPSJhc2hyIikKYGBgCgpgYGB7cn0KcGFyKG1mcm93PWMoMSwzKSwgbWFyPWMoNCw0LDIsMSkpCnhsaW0gPC0gYygxLDFlNSk7IHlsaW0gPC0gYygtMywzKQpwbG90TUEocmVzTEZDLCB4bGltPXhsaW0sIHlsaW09eWxpbSwgbWFpbj0iYXBlZ2xtIikKcGxvdE1BKHJlc05vcm0sIHhsaW09eGxpbSwgeWxpbT15bGltLCBtYWluPSJub3JtYWwiKQpwbG90TUEocmVzQXNoLCB4bGltPXhsaW0sIHlsaW09eWxpbSwgbWFpbj0iYXNociIpCmBgYAojIHBsb3QgY291bnRzCmBgYHtyfQpwbG90Q291bnRzKGRkcywgZ2VuZT13aGljaC5taW4ocmVzJHBhZGopLCBpbnRncm91cD0iY29uZGl0aW9uIikKYGBgCgpgYGB7cn0KZCA8LSBwbG90Q291bnRzKGRkcywgZ2VuZT13aGljaC5taW4ocmVzJHBhZGopLCBpbnRncm91cD0iY29uZGl0aW9uIiwgCiAgICAgICAgICAgICAgICByZXR1cm5EYXRhPVRSVUUpCmxpYnJhcnkoImdncGxvdDIiKQpnZ3Bsb3QoZCwgYWVzKHg9Y29uZGl0aW9uLCB5PWNvdW50KSkgKyAKICBnZW9tX3BvaW50KHBvc2l0aW9uPXBvc2l0aW9uX2ppdHRlcih3PTAuMSxoPTApKSArIAogIHNjYWxlX3lfbG9nMTAoYnJlYWtzPWMoMjUsMTAwLDQwMCkpCmBgYAojIG1vcmUgaW5mb3JtYXRpb24KYGBge3J9Cm1jb2xzKHJlcykkZGVzY3JpcHRpb24KYGBgCgojIGZpbmFsbHkgd3JpdGUgY3N2CmBgYHtyfQpnZXR3ZCgpCndyaXRlLmNzdihhcy5kYXRhLmZyYW1lKHJlc19nKSwgCiAgICAgICAgICBmaWxlPSJyZXNfZy5jc3YiKQpgYGAKIyBkaWZmZXJlbnRpYWwgZXhwcmVzc2lvbiBnZW5lIHA8MC4wNQpgYGB7cn0KcmVzU2lnXzAuMDUgPC0gc3Vic2V0KHJlc09yZGVyZWQsIHBhZGogPCAwLjA1KQpyZXNTaWdfMC4wNVt3aGljaChyZXNTaWdfMC4wNSRsb2cyRm9sZENoYW5nZSA+IDApLCAidXBfZG93biJdIDwtICJ1cCIKcmVzU2lnXzAuMDVbd2hpY2gocmVzU2lnXzAuMDUkbG9nMkZvbGRDaGFuZ2UgPCAwKSwgInVwX2Rvd24iXSA8LSAiZG93biIKcmVzU2lnXzAuMDUKYGBgCgpgYGB7cn0KZ2V0d2QoKQp3cml0ZS5jc3YoYXMuZGF0YS5mcmFtZShyZXNTaWdfMC4wNSksIAogICAgICAgICAgZmlsZT0iZGlmZmVyZW50aWFsX2V4cHJlc3Npb24uY3N2IikKYGBgCgojIGRhdGEgdHJhbnNmb3JtYXRpb25zIGFuZCB2aXN1YWxpemF0aW9uCiMjIHZzZCBkYXRhIHRyYW5zZm9ybWF0aW9ucwpgYGB7cn0KdnNkIDwtIHZzdChkZHMsIGJsaW5kPUZBTFNFKQpgYGAKIyMgcmxkIGRhdGEgdHJhbnNmb3JtYXRpb25zCiMjIHRha2VzIHRvbyBtdWNoIHRpbWUgYW5kIGhhdmUgbm8gcmVzdWx0cwpgYGB7cn0KcmxkIDwtIHJsb2coZGRzLCBibGluZD1GQUxTRSkKYGBgCgpgYGB7cn0KaGVhZChhc3NheSh2c2QpLCAzKQpgYGAKIyMgY2hhbmdlIG9mIHN0YW5kYXJkIGRldmlhdGlvbgpgYGB7cn0KIyB0aGlzIGdpdmVzIGxvZzIobiArIDEpCm50ZCA8LSBub3JtVHJhbnNmb3JtKGRkcykKbGlicmFyeSgidnNuIikKbWVhblNkUGxvdChhc3NheShudGQpKQpgYGAKCmBgYHtyfQptZWFuU2RQbG90KGFzc2F5KHZzZCkpCmBgYAojIyMgbmVlZCBybGcKYGBge3J9Cm1lYW5TZFBsb3QoYXNzYXkocmxkKSkKYGBgCiMgZGF0YSBxdWFsaXR5IGFzc2Vzc21lbnQgYnkgc2FtcGxlIGNsdXN0ZXJpbmcgYW5kIHZpc3VhbGl6YXRpb24KIyMgaGVhdG1hcCBvZiBudGQKIyMjIHNob3dfY29sbmFtZXMgPSBGQUxTRQpgYGB7cn0KbGlicmFyeSgicGhlYXRtYXAiKQpzZWxlY3QgPC0gb3JkZXIocm93TWVhbnMoY291bnRzKGRkcyxub3JtYWxpemVkPVRSVUUpKSwKICAgICAgICAgICAgICAgIGRlY3JlYXNpbmc9VFJVRSlbMToyMF0KZGYgPC0gYXMuZGF0YS5mcmFtZShjb2xEYXRhKGRkcylbLGMoImNvbmRpdGlvbiIsICJzaXplRmFjdG9yIildKQpwaGVhdG1hcChhc3NheShudGQpW3NlbGVjdCxdLCBjbHVzdGVyX3Jvd3M9RkFMU0UsIHNob3dfcm93bmFtZXM9RkFMU0UsCiAgICAgICAgIGNsdXN0ZXJfY29scz1GQUxTRSwgYW5ub3RhdGlvbl9jb2w9ZGYsIHNob3dfY29sbmFtZXMgPSBGQUxTRSkKYGBgCiMjIGhlYXRtYXAgb2YgdnNkCmBgYHtyfQpwaGVhdG1hcChhc3NheSh2c2QpW3NlbGVjdCxdLCBjbHVzdGVyX3Jvd3M9RkFMU0UsIHNob3dfcm93bmFtZXM9RkFMU0UsCiAgICAgICAgIGNsdXN0ZXJfY29scz1GQUxTRSwgYW5ub3RhdGlvbl9jb2w9ZGYsIHNob3dfY29sbmFtZXMgPSBGQUxTRSkKYGBgCiMjIGhlYXRtYXAgb2YgcmxnCiMjIyBuZWVkIHJsZwpgYGB7cn0KcGhlYXRtYXAoYXNzYXkocmxkKVtzZWxlY3QsXSwgY2x1c3Rlcl9yb3dzPUZBTFNFLCBzaG93X3Jvd25hbWVzPUZBTFNFLAogICAgICAgICBjbHVzdGVyX2NvbHM9RkFMU0UsIGFubm90YXRpb25fY29sPWRmKQpgYGAKIyMgSGVhdG1hcCBvZiB0aGUgc2FtcGxlLXRvLXNhbXBsZSBkaXN0YW5jZXMKYGBge3J9CnNhbXBsZURpc3RzIDwtIGRpc3QodChhc3NheSh2c2QpKSkKYGBgCiMjIyBzaG93X3Jvd25hbWVzID0gRkFMU0UKYGBge3J9CmxpYnJhcnkoIlJDb2xvckJyZXdlciIpCnNhbXBsZURpc3RNYXRyaXggPC0gYXMubWF0cml4KHNhbXBsZURpc3RzKQpyb3duYW1lcyhzYW1wbGVEaXN0TWF0cml4KSA8LSBwYXN0ZSh2c2QkY29uZGl0aW9uLCB2c2QkdHlwZSwgc2VwPSItIikKY29sbmFtZXMoc2FtcGxlRGlzdE1hdHJpeCkgPC0gTlVMTApjb2xvcnMgPC0gY29sb3JSYW1wUGFsZXR0ZSggcmV2KGJyZXdlci5wYWwoOSwgIkJsdWVzIikpICkoMjU1KQpwaGVhdG1hcChzYW1wbGVEaXN0TWF0cml4LAogICAgICAgICBjbHVzdGVyaW5nX2Rpc3RhbmNlX3Jvd3M9c2FtcGxlRGlzdHMsCiAgICAgICAgIGNsdXN0ZXJpbmdfZGlzdGFuY2VfY29scz1zYW1wbGVEaXN0cywKICAgICAgICAgY29sPWNvbG9ycywgc2hvd19yb3duYW1lcyA9IEZBTFNFKQpgYGAKIyMgcHJpbmNpcGFsIGNvbXBvbmVudCBwbG90IG9mIHRoZSBzYW1wbGVzCmBgYHtyfQpwbG90UENBKHZzZCwgaW50Z3JvdXA9ImNvbmRpdGlvbiIpCmBgYAojIyBzYW1lIHRoaW5nCmBgYHtyfQpwY2FEYXRhIDwtIHBsb3RQQ0EodnNkLCBpbnRncm91cD0iY29uZGl0aW9uIiwgcmV0dXJuRGF0YT1UUlVFKQpwZXJjZW50VmFyIDwtIHJvdW5kKDEwMCAqIGF0dHIocGNhRGF0YSwgInBlcmNlbnRWYXIiKSkKZ2dwbG90KHBjYURhdGEsIGFlcyhQQzEsIFBDMiwgY29sb3I9Y29uZGl0aW9uKSkgKwogIGdlb21fcG9pbnQoc2l6ZT0zKSArCiAgeGxhYihwYXN0ZTAoIlBDMTogIixwZXJjZW50VmFyWzFdLCIlIHZhcmlhbmNlIikpICsKICB5bGFiKHBhc3RlMCgiUEMyOiAiLHBlcmNlbnRWYXJbMl0sIiUgdmFyaWFuY2UiKSkgKyAKICBjb29yZF9maXhlZCgpCmBgYAoK